0.1 PCA on annuli stats using \(L_1\) Norm

naL1 <- fread("rorb_annuli_stats_GoogleDoc.csv", header = FALSE)
cnames <- colnames(naL1)
meta <- fread("GoogleDocData.csv")
loc <- fread("GoogleDocData.csv")[, 1:3]
ids <- as.data.frame(read.csv("GoogleDocData.csv")[, 4])
colnames(ids) <- 'id'

ccolL1 <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'black', 'green')
ccolL1 <-  rep(ccolL1, each = 15)

CHAN_NAMES <- rep(c('DAPI1', 'DAPI2', 'DAPI3', 'GABA', 'GAD2', 'Gephyrin', 'GluN1',
                'MBP', 'PSD95', 'synapsin', 'TdTomato', 'VGlut1'), each = 15)
colnames(naL1) <- paste0(CHAN_NAMES,"_", sprintf("%02d",1:15))
snaL1 <- scale(naL1, center = TRUE, scale = TRUE)
pc1 <- princomp(snaL1)
gaba <- as.factor(meta$gaba)

0.2 LDA and QDA

Next, applying LDA and QDA iterating on the number of PCA dimension included.

ldaL1 <- foreach(x = 1:ncol(snaL1)) %dopar% {
           ldatmp <- lda(gaba ~ pc1$scores[, 1:x], CV = FALSE)
           ldatmp
}

qdaL1 <- foreach(x = 1:ncol(snaL1)) %dopar% {
           qdatmp <- 
             tryCatch(qda(as.factor(gaba) ~ pc1$scores[, 1:x], CV = FALSE),
                      error=function(err) NA)
           qdatmp
}

rfL1 <- foreach(x = 1:ncol(snaL1)) %dopar% {
           tmpdat <- data.table(gaba = gaba, pc1$scores[, 1:x])
           rftmp <- randomForest(gaba ~ ., data = tmpdat, prox = TRUE)
           rftmp
}

ll <- sapply(ldaL1, function(x) sum(predict(x)$class != gaba)/length(gaba))
ql <- sapply(qdaL1[!is.na(qdaL1)], function(x) sum(predict(x)$class != gaba)/length(gaba))
rl <- sapply(rfL1,   function(x) sum(predict(x) != gaba)/length(gaba))
plot(x = 1:ncol(pc1$scores), y = seq(0,max(ll,ql,rl),length = ncol(pc1$scores)), type = 'n', xlab = expression(hat(d)), ylab = "L")
points(ll, type = 'b', col = 'blue', pch = 20, cex = 0.5)
points(ql, type = 'b', col = 'orange', pch = 15, cex = 0.5)
points(rl, type = 'b', col = 'darkgreen', lty = 2, pch = 24, cex = 0.2)
abline(h = chance <- sum(gaba == 1)/length(gaba), col = 'magenta', lty=4)
text(40,min(ql), label = paste("qda", min(round(ql, 3))), col = 'orange', pos = 4)
text(ncol(snaL1)/2, min(ll), label = paste("lda", min(round(ll,3))), col = 'blue', pos = 1)
text(ncol(snaL1)/3, min(rl), label = paste("rf", min(round(rl,3))), col = 'darkgreen')
text(ncol(snaL1)/4, chance, label = "chance", col = 'magenta', pos = 3)

0.3 PCA on annuli stats using \(L_{\infty}\) Norm

naInf <- fread("rorb_annuli_stats_GoogleDocInf.csv", header = FALSE)
cnames <- colnames(naInf)
meta <- fread("GoogleDocData.csv")
loc <- fread("GoogleDocData.csv")[, 1:3]
ids <- as.data.frame(read.csv("GoogleDocData.csv")[, 4])
colnames(ids) <- 'id'

ccolLinf <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'black', 'green')
ccolLinf <-  rep(ccolLinf, each = 5)

CHAN_NAMES <- rep(c('DAPI1', 'DAPI2', 'DAPI3', 'GABA', 'GAD2', 'Gephyrin', 'GluN1',
                'MBP', 'PSD95', 'synapsin', 'TdTomato', 'VGlut1'), each = 5)
colnames(naInf) <- paste0(CHAN_NAMES,"_", sprintf("%d",1:5))

## annotation ids in the BOSS are +1 from the ids in Forrest's gsheet.
#annoSizes <- read.csv('annotation_sizes_pixels.csv')
#annoSizes$id <- annoSizes$id - 1
#
#tmp <- merge(ids, annoSizes, by = "id", all.x = TRUE)
snaLinf <- scale(naInf, center = TRUE, scale = TRUE)
pcInf <- princomp(snaLinf)
gaba <- as.factor(meta$gaba)

0.4 LDA and QDA

Next, applying LDA and QDA iterating on the number of PCA dimension included.

ldaLinf <- foreach(x = 1:ncol(snaLinf)) %dopar% {
           ldatmp <- lda(gaba ~ pcInf$scores[, 1:x], CV = FALSE)
           ldatmp
}

qdaLinf <- foreach(x = 1:ncol(snaLinf)) %dopar% {
           qdatmp <- 
             tryCatch(qda(as.factor(gaba) ~ pcInf$scores[, 1:x], CV = FALSE),
                      error=function(err) NA)
           qdatmp
}

rfLinf <- foreach(x = 1:ncol(snaLinf)) %dopar% {
           tmpdat <- data.table(gaba = gaba, pcInf$scores[, 1:x])
           rftmp <- randomForest(gaba ~ ., data = tmpdat, prox = TRUE)
           rftmp
}

ll <- sapply(ldaLinf, function(x) sum(predict(x)$class != gaba)/length(gaba))
ql <- sapply(qdaLinf, function(x) sum(predict(x)$class != gaba)/length(gaba))
rl <- sapply(rfLinf,  function(x) sum(predict(x) != gaba)/length(gaba))
plot(x = 1:ncol(pcInf$scores), y = seq(0,max(ll,ql,rl),length = ncol(pcInf$scores)), type = 'n', xlab = expression(hat(d)), ylab = "L")
points(ll, type = 'b', col = 'blue', pch = 20, cex = 0.5)
points(ql, type = 'b', col = 'orange', pch = 15, cex = 0.5)
points(rl, type = 'b', col = 'darkgreen', lty = 2, pch = 24, cex = 0.2)
abline(h = chance <- sum(gaba == 1)/length(gaba), col = 'magenta', lty=4)
text(40,min(ql), label = paste("qda", min(round(ql, 3))), col = 'orange', pos = 4)
text(ncol(snaLinf)/2, min(ll), label = paste("lda", min(round(ll,3))), col = 'blue', pos = 1)
text(ncol(snaLinf)/3, min(rl), label = paste("rf", min(round(rl,3))), col = 'darkgreen')
text(ncol(snaLinf)/4, chance, label = "chance", col = 'magenta', pos = 3)

1 Results for \(L_1\)

1.1 1-d Heatmap

1.2 Location meda_plots

1.3 Outliers as given by randomForest

1.4 Correlation Matrix

1.5 Cumulative Variance with Elbows

1.6 Paired Hex-binned plot

1.7 Hierarchical GMM Classifications

1.8 Hierarchical GMM Dendrogram

1.9 Stacked Means

1.10 Cluster Means

1.11 MEDA on Annuli stats using \(L_{\infty}\) norm.

2 Results for \(L_{\infty}\)

2.1 1-d Heatmap

2.2 Location meda_plots

2.3 Outliers as given by randomForest

2.4 Correlation Matrix

2.5 Cumulative Variance with Elbows

2.6 Paired Hex-binned plot

2.7 Hierarchical GMM Classifications

2.8 Hierarchical GMM Dendrogram

2.9 Stacked Means

2.10 Cluster Means